load data
USE_DC <- TRUE
text = read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/EMLLM/info/ia_label_mapping_opt_surprisal.csv')
if (USE_DC){eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_recomputed/'
} else {eeg_dir = '/Volumes/Blue1TB/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_nodc/'
}
file_pat = '_reading_N400_stats\\.csv$'
files=list.files(path = eeg_dir, pattern= file_pat)
Loop over participants
all<-c()
i<-0
for (f in files){
i=i+1
df <-read.csv(paste0(eeg_dir,f))
p <- strsplit(f,'_')
pID <- paste(p[[1]][1],p[[1]][2],sep='_')
df$pID <- pID
# df<- df %>% dplyr::select(c("pID", "type","latency","fixDur","task","identifier" , "page_fixation_ix","IA_ID","n400_magnitude_CPz","n400_latency_CPz" ))
df<- df %>% filter(task=='reading') %>% droplevels()
all[[i]] <- df
}
df <- do.call(rbind, all)
# Merge with text info for surprisal values
df <- df %>% merge(text, on=c("IA_ID","identifier"))
df <- df %>% rename(surprisalText=opt.125m_surprisal_wholetext, surprisalPage = opt.125m_surprisal_page,fixDur=duration)
# drop rows with no IA
df <- drop_na(df, "IA_LABEL")
# drop rows with punc
df <- df %>% filter(! IA_LABEL %in% c('.',',','-','--',';',':',"'",'?','!'))
feats <- grep("n400|fixDur",colnames(df), value=T)
feats_eeg <- grep("n400",colnames(df), value=T)
plots
df_long <- df %>% pivot_longer(all_of(c(feats,'logFixDur')), names_to="feature",values_to="value")
df_long <- df_long %>% mutate(channel=str_match(feature, 'n400_\\w*_([[:alnum:]]{2,5})$')[,2],
feature_type=str_match(feature, '(n400_\\w*)_[[:alnum:]]{2,5}$')[,2])
# compute lower and upper whiskers for each group
ylims <- df_long %>%
group_by(feature_type) %>%
summarise(Q1 = quantile(value, 1/100), Q3 = quantile(value, 99/100)) %>%
ungroup()
p1<-ggplot(df_long %>% filter(feature_type=='n400_mean')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_mean')
p2<-ggplot(df_long %>% filter(feature_type=='n400_min_magnitude')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_min')
p3<-ggplot(df_long %>% filter(feature_type=='n400_max_magnitude')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_max')
grid.arrange(p1,p2,p3)

ggplot(df)+
geom_density(aes(x=fixDur))+
scale_x_log10()

ggplot(df)+
geom_density(aes(x=surprisalText))+
scale_x_log10()

ggplot(df_long)+
geom_violin(aes(x=value,y=factor(channel), fill=factor(channel),outliers = FALSE), alpha=0.3)+
facet_wrap(~feature_type, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_mean'))+
geom_point(aes(x=surprisalText,y=value, color=factor(channel)), alpha=0.05, size=0.5)+
facet_wrap(~channel, scales="free")

repeat some plots after transformatiions
ggplot(df,aes(x=logSurprisalText, y=logFixDur)) +
geom_point(size=1, alpha=0.01)+stat_cor(method="pearson")

ggplot(df_long %>% filter(feature_type =='n400_mean'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_max_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_min_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

LME stats
predict EEG from LOG surprisal and fixDur
m.l.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
tab_model(m.l.m, m.l.min, m.l.max, m.l.zc, show.ci=F)
|
|
n 400 mean C Pz
|
n 400 max magnitude C Pz
|
n 400 min magnitude C Pz
|
n 400 zero crossings C Pz
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
-2.03
|
0.592
|
71.61
|
<0.001
|
-79.35
|
<0.001
|
2.13
|
<0.001
|
|
logFixDur
|
0.59
|
0.390
|
0.49
|
0.484
|
1.06
|
0.157
|
-0.01
|
0.542
|
|
logSurprisalText
|
-0.82
|
0.001
|
-0.91
|
<0.001
|
-0.67
|
0.009
|
-0.00
|
0.975
|
|
Random Effects
|
|
σ2
|
10319.24
|
10647.05
|
12018.42
|
5.00
|
|
τ00
|
44.81 pID
|
1083.91 pID
|
493.09 pID
|
0.52 pID
|
|
|
25.46 identifier
|
29.95 identifier
|
30.35 identifier
|
0.02 identifier
|
|
ICC
|
0.01
|
0.09
|
0.04
|
0.10
|
|
N
|
50 identifier
|
50 identifier
|
50 identifier
|
50 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
104272
|
104272
|
104272
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.007
|
0.000 / 0.095
|
0.000 / 0.042
|
0.000 / 0.096
|
dotplot(ranef(m.l.m))
## $pID

##
## $identifier

predict fixDur from surprisal
m.d <- lmer(fixDur ~ (1|identifier) + (1|pID) + logSurprisalText, data=df)
tab_model(m.d)
|
|
fix Dur
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
208.40
|
202.58 – 214.22
|
<0.001
|
|
logSurprisalText
|
1.81
|
1.38 – 2.24
|
<0.001
|
|
Random Effects
|
|
σ2
|
8839.40
|
|
τ00 pID
|
729.69
|
|
τ00 identifier
|
31.32
|
|
ICC
|
0.08
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.001 / 0.080
|
predict fixation fixDur from EEG features
m.eye <- lmer(fixDur ~ (1|identifier) + (1|pID) + n400_mean_CPz, data=df)
tab_model(m.eye)
|
|
fix Dur
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
210.20
|
204.39 – 216.01
|
<0.001
|
|
n400 mean CPz
|
0.01
|
0.00 – 0.01
|
0.002
|
|
Random Effects
|
|
σ2
|
8844.36
|
|
τ00 pID
|
729.84
|
|
τ00 identifier
|
32.26
|
|
ICC
|
0.08
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.079
|
predict EEG and surprisal from previous IA surprisal
mp.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText + prev_logSurprisalText, data=df)
tab_model(mp.m)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-1.58
|
-9.06 – 5.90
|
0.678
|
|
logFixDur
|
0.60
|
-0.76 – 1.97
|
0.385
|
|
logSurprisalText
|
-0.77
|
-1.23 – -0.30
|
0.001
|
|
prev logSurprisalText
|
-0.53
|
-1.00 – -0.06
|
0.026
|
|
Random Effects
|
|
σ2
|
10266.18
|
|
τ00 pID
|
45.97
|
|
τ00 identifier
|
25.85
|
|
ICC
|
0.01
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
102838
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.007
|
mp.s <- lm( logSurprisalText ~ prev_logSurprisalText, data=df)
tab_model(mp.s)
|
|
log Surprisal Text
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.89
|
0.88 – 0.90
|
<0.001
|
|
prev logSurprisalText
|
0.15
|
0.14 – 0.15
|
<0.001
|
|
Observations
|
102838
|
|
R2 / R2 adjusted
|
0.022 / 0.022
|
model including bool for refixation
mr.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + refixation*logSurprisalText, data=df)
tab_model(mr.m)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-2.94
|
-10.43 – 4.55
|
0.442
|
|
logFixDur
|
0.62
|
-0.74 – 1.97
|
0.373
|
|
refixation [1]
|
1.42
|
-0.18 – 3.03
|
0.082
|
|
logSurprisalText
|
-0.46
|
-1.20 – 0.28
|
0.225
|
refixation [1] * logSurprisalText
|
-0.65
|
-1.59 – 0.29
|
0.176
|
|
Random Effects
|
|
σ2
|
10319.12
|
|
τ00 pID
|
44.82
|
|
τ00 identifier
|
25.40
|
|
ICC
|
0.01
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.007
|
Anova(mr.m)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: n400_mean_CPz
## Chisq Df Pr(>Chisq)
## logFixDur 0.7943 1 0.3727947
## refixation 1.4121 1 0.2347109
## logSurprisalText 12.9557 1 0.0003189 ***
## refixation:logSurprisalText 1.8291 1 0.1762313
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_model(mr.m,type = "emm", terms=c("logSurprisalText", "refixation"))

tb<-tableone::CreateTableOne(data = df, vars = c("n400_mean_CPz", "logFixDur"), strata = 'refixation', test=T)
knitr::kable(print(tb))
## Stratified by refixation
## 0 1 p test
## n 36498 67774
## n400_mean_CPz (mean (SD)) -0.14 (104.67) 0.48 (100.36) 0.351
## logFixDur (mean (SD)) 5.27 (0.44) 5.24 (0.49) <0.001
| n |
36498 |
67774 |
|
|
| n400_mean_CPz (mean (SD)) |
-0.14 (104.67) |
0.48 (100.36) |
0.351 |
|
| logFixDur (mean (SD)) |
5.27 (0.44) |
5.24 (0.49) |
<0.001 |
|
predict EEG from surprisal and fixDur with random slope of surprisal for participant
m.rs.m <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText + logFixDur, data=df)
m.rs.min <- lmer(n400_max_magnitude_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logFixDur + logSurprisalText, data=df)
m.rs.max <- lmer(n400_min_magnitude_CPz ~(1+logSurprisalText|pID) + (1|identifier) + logFixDur + logSurprisalText, data=df)
tab_model(m.rs.m, m.rs.min, m.rs.max, show.ci=F)
|
|
n 400 mean C Pz
|
n 400 max magnitude C Pz
|
n 400 min magnitude C Pz
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
-2.02
|
0.594
|
71.59
|
<0.001
|
-79.40
|
<0.001
|
|
logSurprisalText
|
-0.85
|
0.005
|
-0.92
|
0.004
|
-0.67
|
0.029
|
|
logFixDur
|
0.61
|
0.378
|
0.51
|
0.473
|
1.08
|
0.151
|
|
Random Effects
|
|
σ2
|
10313.06
|
10639.21
|
12013.52
|
|
τ00
|
60.26 pID
|
1080.25 pID
|
507.62 pID
|
|
|
25.69 identifier
|
30.13 identifier
|
30.55 identifier
|
|
τ11
|
3.27 pID.logSurprisalText
|
4.14 pID.logSurprisalText
|
2.63 pID.logSurprisalText
|
|
ρ01
|
-0.65 pID
|
-0.01 pID
|
-0.22 pID
|
|
ICC
|
0.01
|
0.10
|
0.04
|
|
N
|
91 pID
|
91 pID
|
91 pID
|
|
|
50 identifier
|
50 identifier
|
50 identifier
|
|
Observations
|
104272
|
104272
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.007
|
0.000 / 0.095
|
0.000 / 0.042
|
dotplot(ranef(m.rs.m))
## $pID

##
## $identifier

Modeling with comprehension/MW
Load Behavioural data
df_comp <- read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EyeMindLink/Processed/Behaviour/EML1_page_level.csv')
df_comp <- df_comp %>% rename(pID=ParticipantID) %>%
mutate(Page=PageNum-1,
identifier=paste0(Text, Page))
df <- merge(df, df_comp, on=c("pID","identifier"))
df_mw <- df %>% drop_na(MW) %>% mutate(MW=as.factor(MW))
model FIXATION LEVEL N400 ~ surprisal*MW (caveat - MW is at page level)
m.mw <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText*MW , data=df_mw)
Anova(m.mw)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: n400_mean_CPz
## Chisq Df Pr(>Chisq)
## logSurprisalText 0.1782 1 0.6730
## MW 0.5418 1 0.4617
## logSurprisalText:MW 1.4514 1 0.2283
tab_model(m.mw)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
2.44
|
-2.78 – 7.65
|
0.360
|
|
logSurprisalText
|
0.25
|
-1.13 – 1.63
|
0.724
|
|
MW [1]
|
3.12
|
-1.63 – 7.86
|
0.198
|
|
logSurprisalText * MW [1]
|
-1.32
|
-3.47 – 0.83
|
0.228
|
|
Random Effects
|
|
σ2
|
9813.79
|
|
τ00 pID
|
264.34
|
|
τ00 identifier
|
48.37
|
|
τ11 pID.logSurprisalText
|
5.19
|
|
ρ01 pID
|
-0.89
|
|
ICC
|
0.03
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.026
|
plot_model(m.mw,type = "emm", terms=c("logSurprisalText", "MW"))

plot_model(m.mw,type = "emm", terms=c("logFixDur", "MW"))
## `logFixDur` was not found in model terms. Maybe misspelled?
## Can't compute estimated marginal means, 'emmeans::emmeans()' returned an error.
##
## Reason: No variable named logFixDur in the reference grid
## You may try 'ggpredict()' or 'ggeffect()'.
## NULL
emmeans(m.mw, pairwise ~ MW, p.adjust = "fdr")
## $emmeans
## MW emmean SE df asymp.LCL asymp.UCL
## 0 2.70 2.42 Inf -2.042 7.44
## 1 4.41 2.65 Inf -0.782 9.61
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## 0 - 1 -1.72 2.01 Inf -0.852 0.3942
##
## Degrees-of-freedom method: asymptotic
bobby’s mode: MW label at page level, surprosal and n400 at fixation level
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB <- lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n400_mean_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 268150.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8036 -0.5495 0.0163 0.5553 6.1471
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1439.761 37.944
## logSurprisalText 8.569 2.927 -1.00
## pID (Intercept) 148.040 12.167
## MW1 283.648 16.842 -0.49
## logSurprisalText 4.536 2.130 0.30 -0.89
## MW1:logSurprisalText 3.799 1.949 -0.79 0.32 -0.44
## Residual 9371.358 96.806
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.1841 3.4506 71.6234 0.923 0.359
## MW1 5.8268 5.6940 76.4198 1.023 0.309
## logSurprisalText 0.4260 0.7361 89.4178 0.579 0.564
## MW1:logSurprisalText -1.4919 1.1792 144.0161 -1.265 0.208
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.588
## lgSrprslTxt -0.397 0.200
## MW1:lgSrprT 0.229 -0.489 -0.602
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.54122 (tol = 0.002, component 1)
tab_model(mB)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
3.18
|
-3.58 – 9.95
|
0.356
|
|
MW [1]
|
5.83
|
-5.33 – 16.99
|
0.306
|
|
logSurprisalText
|
0.43
|
-1.02 – 1.87
|
0.563
|
|
MW [1] * logSurprisalText
|
-1.49
|
-3.80 – 0.82
|
0.206
|
|
Random Effects
|
|
σ2
|
9371.36
|
|
τ00 pID:identifier
|
1439.76
|
|
τ00 pID
|
148.04
|
|
τ11 pID:identifier.logSurprisalText
|
8.57
|
|
τ11 pID.MW1
|
283.65
|
|
τ11 pID.logSurprisalText
|
4.54
|
|
τ11 pID.MW1:logSurprisalText
|
3.80
|
|
ρ01 pID:identifier
|
-1.00
|
|
ρ01 pID.MW1
|
-0.49
|
|
ρ01 pID.logSurprisalText
|
0.30
|
|
ρ01 pID.MW1:logSurprisalText
|
-0.79
|
|
ICC
|
0.13
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.131
|
min n400
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.min <- lmer(n400_min_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.min)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## n400_min_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 271688.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7679 -0.5431 0.0392 0.5765 5.4070
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1644.07 40.547
## logSurprisalText 12.70 3.564 -1.00
## pID (Intercept) 2410.96 49.101
## MW1 13611.72 116.669 0.30
## logSurprisalText 10.95 3.310 -0.53 0.05
## MW1:logSurprisalText 438.46 20.940 -0.50 -0.96 -0.07
## Residual 10873.24 104.275
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -73.4987 6.4000 32.9731 -11.484 0.000000000000459 ***
## MW1 7.5202 16.9979 242.8090 0.442 0.659
## logSurprisalText 0.2887 0.8523 76.7971 0.339 0.736
## MW1:logSurprisalText -1.5319 2.9269 31.9680 -0.523 0.604
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 0.001
## lgSrprslTxt -0.456 0.114
## MW1:lgSrprT -0.189 -0.900 -0.251
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.min)
|
|
n 400 min magnitude C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-73.50
|
-86.04 – -60.95
|
<0.001
|
|
MW [1]
|
7.52
|
-25.80 – 40.84
|
0.658
|
|
logSurprisalText
|
0.29
|
-1.38 – 1.96
|
0.735
|
|
MW [1] * logSurprisalText
|
-1.53
|
-7.27 – 4.21
|
0.601
|
|
Random Effects
|
|
σ2
|
10873.24
|
|
τ00 pID:identifier
|
1644.07
|
|
τ00 pID
|
2410.96
|
|
τ11 pID:identifier.logSurprisalText
|
12.70
|
|
τ11 pID.MW1
|
13611.72
|
|
τ11 pID.logSurprisalText
|
10.95
|
|
τ11 pID.MW1:logSurprisalText
|
438.46
|
|
ρ01 pID:identifier
|
-1.00
|
|
ρ01 pID.MW1
|
0.30
|
|
ρ01 pID.logSurprisalText
|
-0.53
|
|
ρ01 pID.MW1:logSurprisalText
|
-0.50
|
|
ICC
|
0.43
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.428
|
max n400
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.max <- lmer(n400_max_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.max)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## n400_max_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 269286.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.5622 -0.5499 -0.0016 0.5598 6.1442
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1937.6 44.018
## logSurprisalText 79.9 8.939 -0.26
## pID (Intercept) 12337.9 111.076
## MW1 3898.6 62.439 -0.99
## logSurprisalText 345.2 18.581 1.00 -0.99
## MW1:logSurprisalText 1771.0 42.083 -0.99 0.96 -0.99
## Residual 9631.1 98.138
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 77.4900 12.3284 39.0833 6.286 0.000000206 ***
## MW1 5.2379 9.3360 9.3264 0.561 0.588
## logSurprisalText 0.7178 2.2145 12.9793 0.324 0.751
## MW1:logSurprisalText -2.1979 4.7796 9.6507 -0.460 0.656
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.813
## lgSrprslTxt 0.808 -0.598
## MW1:lgSrprT -0.837 0.522 -0.891
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
tab_model(mB.max)
|
|
n 400 max magnitude C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
77.49
|
53.33 – 101.65
|
<0.001
|
|
MW [1]
|
5.24
|
-13.06 – 23.54
|
0.575
|
|
logSurprisalText
|
0.72
|
-3.62 – 5.06
|
0.746
|
|
MW [1] * logSurprisalText
|
-2.20
|
-11.57 – 7.17
|
0.646
|
|
Random Effects
|
|
σ2
|
9631.14
|
|
τ00 pID:identifier
|
1937.62
|
|
τ00 pID
|
12337.85
|
|
τ11 pID:identifier.logSurprisalText
|
79.90
|
|
τ11 pID.MW1
|
3898.61
|
|
τ11 pID.logSurprisalText
|
345.24
|
|
τ11 pID.MW1:logSurprisalText
|
1771.00
|
|
ρ01 pID:identifier
|
-0.26
|
|
ρ01 pID.MW1
|
-0.99
|
|
ρ01 pID.logSurprisalText
|
1.00
|
|
ρ01 pID.MW1:logSurprisalText
|
-0.99
|
|
ICC
|
0.58
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.585
|
Eye-mind-linkage metric at page level
note atanh transform for correlation when it is to be used in model
eml_page <- df %>% group_by(pID, identifier) %>% summarise(
count=n(),
cor_n400meanCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_mean_CPz)),
cor_n400minCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_min_magnitude_CPz)),
cor_n400maxCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_max_magnitude_CPz)),
cor_logFixDur_logSurprisalText = atanh(cor(logSurprisalText, logFixDur)),
meanLogSurprisalText=mean(logSurprisalText)
) %>% drop_na() %>%
filter(count>2) # remove instances with single or two fixation on page as correlation coeff will be 1 or -1
eml_page[Reduce(`&`, lapply(eml_page, is.finite)),]
## # A tibble: 0 × 8
## # Groups: pID [0]
## # … with 8 variables: pID <chr>, identifier <fct>, count <int>,
## # cor_n400meanCPz_logSurprisalText <dbl>,
## # cor_n400minCPz_logSurprisalText <dbl>,
## # cor_n400maxCPz_logSurprisalText <dbl>,
## # cor_logFixDur_logSurprisalText <dbl>, meanLogSurprisalText <dbl>
df_page <- left_join(eml_page, df_comp, on=c('pID', 'identifer'))
df_page$MW = as.factor(df_page$MW)
# plot the new metrics
p1<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400meanCPz_logSurprisalText, color=MW))
p2<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_logFixDur_logSurprisalText, color=MW))
p3<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400minCPz_logSurprisalText, color=MW))
p4<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400maxCPz_logSurprisalText, color=MW))
grid.arrange(p1,p2,p3,p4)

# plot by participantID
ggplot(df_page, aes(x=cor_n400meanCPz_logSurprisalText, y=pID)) +
geom_boxplot( horizontal = TRUE, outlier.colour="red",
outlier.fill="red",
outlier.size=0.2)

binomial regression: predict page level MW from page level correlations with surprisal
m.mw.mean <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + (1|identifier) + (1+cor_n400meanCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.mean,type = "emm", terms=c("cor_n400meanCPz_logSurprisalText"))

m.mw.min <- glmer(MW ~ cor_n400minCPz_logSurprisalText + (1|identifier) + (1+cor_n400minCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.min,type = "emm", terms=c("cor_n400minCPz_logSurprisalText"))

m.mw.max <- glmer(MW ~ cor_n400maxCPz_logSurprisalText + (1|identifier) + (1+cor_n400maxCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
plot_model(m.mw.max,type = "emm", terms=c("cor_n400maxCPz_logSurprisalText"))

m.mw.all <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + cor_n400minCPz_logSurprisalText +cor_n400maxCPz_logSurprisalText + (1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.mean, m.mw.min, m.mw.max)
|
|
MW
|
MW
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.47 – 0.47
|
<0.001
|
0.48
|
0.28 – 0.82
|
0.008
|
0.48
|
0.28 – 0.83
|
0.008
|
cor n400meanCPz logSurprisalText
|
0.36
|
0.36 – 0.36
|
<0.001
|
|
|
|
|
|
|
cor n400minCPz logSurprisalText
|
|
|
|
0.33
|
0.08 – 1.38
|
0.130
|
|
|
|
cor n400maxCPz logSurprisalText
|
|
|
|
|
|
|
0.60
|
0.16 – 2.21
|
0.442
|
|
Random Effects
|
|
σ2
|
3.29
|
3.29
|
3.29
|
|
τ00
|
1.27 pID
|
1.26 pID
|
1.22 pID
|
|
|
0.69 identifier
|
0.68 identifier
|
0.66 identifier
|
|
τ11
|
0.07 pID.cor_n400meanCPz_logSurprisalText
|
0.04 pID.cor_n400minCPz_logSurprisalText
|
0.95 pID.cor_n400maxCPz_logSurprisalText
|
|
ρ01
|
-1.00 pID
|
-1.00 pID
|
-1.00 pID
|
|
ICC
|
0.37
|
0.37
|
0.38
|
|
N
|
20 identifier
|
20 identifier
|
20 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
287
|
287
|
287
|
|
Marginal R2 / Conditional R2
|
0.023 / 0.389
|
0.021 / 0.384
|
0.005 / 0.381
|
tab_model(m.mw.mean)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.47 – 0.47
|
<0.001
|
cor n400meanCPz logSurprisalText
|
0.36
|
0.36 – 0.36
|
<0.001
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.27
|
|
τ00 identifier
|
0.69
|
|
τ11 pID.cor_n400meanCPz_logSurprisalText
|
0.07
|
|
ρ01 pID
|
-1.00
|
|
ICC
|
0.37
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.023 / 0.389
|
tab_model(m.mw.all)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.27 – 0.83
|
0.010
|
cor n400meanCPz logSurprisalText
|
0.16
|
0.00 – 25.31
|
0.474
|
cor n400minCPz logSurprisalText
|
1.06
|
0.02 – 44.93
|
0.976
|
cor n400maxCPz logSurprisalText
|
2.40
|
0.12 – 46.38
|
0.563
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.32
|
|
τ00 identifier
|
0.71
|
|
ICC
|
0.38
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.031 / 0.401
|
emmeans(m.mw.mean, pairwise ~ cor_n400meanCPz_logSurprisalText, p.adjust = "fdr")
## $emmeans
## cor_n400meanCPz_logSurprisalText emmean SE df asymp.LCL asymp.UCL
## -0.002209 -0.7456 0.001643 Inf -0.7488 -0.7424
##
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## (nothing) nonEst NA NA NA NA
##
## Note: contrasts are still on the logit scale
Does page-average surprisal predict MW
m.mw.sp <- glmer(MW ~ meanLogSurprisalText + (1+meanLogSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.sp)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.25
|
0.08 – 0.81
|
0.020
|
|
meanLogSurprisalText
|
1.96
|
0.61 – 6.24
|
0.256
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.91
|
|
τ11 pID.meanLogSurprisalText
|
4.98
|
|
ρ01 pID
|
-0.93
|
|
ICC
|
0.36
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.010 / 0.367
|